In [8]:
# !pip install ultralytics
# ! git clone https://github.com/ultralytics/yolov5.git
In [1]:
# !python -m venv venv-yolov5
# !venv-yolov5\Scripts\activate.bat
# !pip install --quiet -r yolov5/requirements.txt
In [ ]:
from ultralytics import YOLO
from pathlib import Path

model = YOLO('./06-object-detection-using-yolo/yolov8n.pt')
model.info()

results = model('./06-object-detection-using-yolo/bus.jpg')

from ultralytics.utils.plotting import Annotator
import matplotlib.pyplot as plt
import cv2

annotator = Annotator(cv2.cvtColor(results[0].orig_img, cv2.COLOR_BGR2RGB))
boxes = results[0].boxes
for box in boxes:
    b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
    c = box.cls
    annotator.box_label(b, model.names[int(c)])

plt.imshow(annotator.result())
YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\06-object-detection-using-yolo\bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 36.8ms
Speed: 1.9ms preprocess, 36.8ms inference, 62.5ms postprocess per image at shape (1, 3, 640, 480)
Out[ ]:
<matplotlib.image.AxesImage at 0x19347013750>
No description has been provided for this image
In [3]:
from ultralytics import YOLO

model = YOLO('06-object-detection-using-yolo/yolov8n.pt')  # load a pretrained model
model.info()
YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs
Out[3]:
(129, 3157200, 0, 8.8575488)
In [4]:
results = model(source='06-object-detection-using-yolo/test.jpg')

from ultralytics.utils.plotting import Annotator
import matplotlib.pyplot as plt
import cv2

annotator = Annotator(cv2.cvtColor(results[0].orig_img, cv2.COLOR_BGR2RGB))
boxes = results[0].boxes
for box in boxes:
    b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
    c = box.cls
    annotator.box_label(b, model.names[int(c)])

plt.imshow(annotator.result())
image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\06-object-detection-using-yolo\test.jpg: 480x640 1 person, 2 cars, 4 traffic lights, 1 fire hydrant, 35.2ms
Speed: 1.8ms preprocess, 35.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Out[4]:
<matplotlib.image.AxesImage at 0x193698c5950>
No description has been provided for this image
In [5]:
from ultralytics import YOLO
import torch
use_pretrained = True

if use_pretrained:
    print('Using a pretrained model')
    model = YOLO('06-object-detection-using-yolo/yolov8n.pt')  # load a pretrained model
else:
    print('Using a scratch model')
    model = YOLO('06-object-detection-using-yolo/yolov8n.yaml') # load a scratch model

model.info()

epochs = 1
data = '06-object-detection-using-yolo/coco128.yaml'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Uncomment the following to train
# This takes a while
results = model.train(data=data, epochs=epochs, device=device);
Using a pretrained model
YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs
engine\trainer: task=detect, mode=train, model=06-object-detection-using-yolo/yolov8n.pt, data=06-object-detection-using-yolo/coco128.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train7, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs\detect\train7

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
 16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    897664  ultralytics.nn.modules.head.Detect           [80, [64, 128, 256]]          
Model summary: 129 layers, 3,157,200 parameters, 3,157,184 gradients, 8.9 GFLOPs

Transferred 355/355 items from pretrained weights
TensorBoard: Start with 'tensorboard --logdir runs\detect\train7', view at http://localhost:6006/
Freezing layer 'model.22.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks...
AMP: checks passed 
train: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\datasets\coco128\labels\train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\datasets\coco128\labels\train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|██████████| 128/128 [00:00<?, ?it/s]
Plotting labels to runs\detect\train7\labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.000119, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
TensorBoard: model graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs\detect\train7
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        1/1      2.48G      1.214      1.669      1.271        217        640: 100%|██████████| 8/8 [00:02<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]
                   all        128        929      0.645       0.53      0.609       0.45

1 epochs completed in 0.003 hours.
Optimizer stripped from runs\detect\train7\weights\last.pt, 6.5MB
Optimizer stripped from runs\detect\train7\weights\best.pt, 6.5MB

Validating runs\detect\train7\weights\best.pt...
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
Model summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.84it/s]
                   all        128        929      0.648      0.524      0.609       0.45
                person         61        254      0.791      0.672      0.763       0.54
               bicycle          3          6      0.495      0.333      0.315      0.273
                   car         12         46      0.789      0.217      0.276      0.169
            motorcycle          4          5      0.682      0.865      0.898      0.718
              airplane          5          6      0.822      0.777      0.927      0.696
                   bus          5          7      0.552      0.714      0.727      0.675
                 train          3          3      0.533      0.667       0.69      0.586
                 truck          5         12          1      0.324      0.477      0.286
                  boat          2          6      0.256      0.167      0.352      0.235
         traffic light          4         14      0.741      0.207      0.205       0.14
             stop sign          2          2          1      0.991      0.995      0.705
                 bench          5          9      0.826      0.531      0.621      0.363
                  bird          2         16      0.921      0.729      0.858      0.519
                   cat          4          4      0.937          1      0.995      0.778
                   dog          9          9      0.637      0.889      0.806      0.612
                 horse          1          2      0.588          1      0.995       0.52
              elephant          4         17      0.864      0.765      0.889      0.662
                  bear          1          1      0.613          1      0.995      0.995
                 zebra          2          4      0.852          1      0.995      0.965
               giraffe          4          9      0.739          1      0.951      0.718
              backpack          4          6      0.607      0.333      0.394      0.245
              umbrella          4         18       0.82      0.506      0.662      0.429
               handbag          9         19      0.579     0.0774      0.207      0.105
                person         61        254      0.791      0.672      0.763       0.54
               bicycle          3          6      0.495      0.333      0.315      0.273
                   car         12         46      0.789      0.217      0.276      0.169
            motorcycle          4          5      0.682      0.865      0.898      0.718
              airplane          5          6      0.822      0.777      0.927      0.696
                   bus          5          7      0.552      0.714      0.727      0.675
                 train          3          3      0.533      0.667       0.69      0.586
                 truck          5         12          1      0.324      0.477      0.286
                  boat          2          6      0.256      0.167      0.352      0.235
         traffic light          4         14      0.741      0.207      0.205       0.14
             stop sign          2          2          1      0.991      0.995      0.705
                 bench          5          9      0.826      0.531      0.621      0.363
                  bird          2         16      0.921      0.729      0.858      0.519
                   cat          4          4      0.937          1      0.995      0.778
                   dog          9          9      0.637      0.889      0.806      0.612
                 horse          1          2      0.588          1      0.995       0.52
              elephant          4         17      0.864      0.765      0.889      0.662
                  bear          1          1      0.613          1      0.995      0.995
                 zebra          2          4      0.852          1      0.995      0.965
               giraffe          4          9      0.739          1      0.951      0.718
              backpack          4          6      0.607      0.333      0.394      0.245
              umbrella          4         18       0.82      0.506      0.662      0.429
               handbag          9         19      0.579     0.0774      0.207      0.105
                   tie          6          7      0.826      0.714      0.676      0.477
              suitcase          2          4      0.623      0.836      0.745      0.546
               frisbee          5          5      0.684        0.8      0.759      0.655
                  skis          1          1      0.616          1      0.995      0.438
             snowboard          2          7      0.768      0.714      0.745      0.501
           sports ball          6          6      0.707      0.413      0.493      0.286
                  kite          2         10      0.694      0.457      0.536      0.182
          baseball bat          4          4      0.427       0.25      0.352      0.224
        baseball glove          4          7      0.655      0.429      0.429      0.274
            skateboard          3          5       0.78        0.6        0.6       0.44
         tennis racket          5          7      0.627      0.487      0.489      0.338
                bottle          6         18      0.534      0.382       0.39      0.222
            wine glass          5         16       0.64      0.312      0.561      0.322
                   cup         10         36      0.588      0.278       0.41      0.295
                  fork          6          6      0.555      0.167      0.255      0.191
                 knife          7         16      0.674      0.518      0.601      0.358
                 spoon          5         22      0.623      0.182       0.32      0.184
                  bowl          9         28      0.743      0.643      0.646        0.5
                banana          1          1          0          0      0.166     0.0482
              sandwich          2          2          0          0      0.398      0.398
                orange          1          4          1      0.304      0.995      0.623
              broccoli          4         11      0.425      0.182      0.254      0.207
                carrot          3         24      0.728      0.458       0.61      0.376
               hot dog          1          2      0.346        0.5      0.745      0.745
                 pizza          5          5      0.711          1      0.995      0.873
                 donut          2         14      0.658          1      0.927      0.849
                  cake          4          4      0.705          1      0.995       0.88
                 chair          9         35      0.509      0.534      0.451      0.252
                 couch          5          6      0.746      0.492      0.693      0.522
          potted plant          9         14      0.717      0.643      0.738      0.485
                   bed          3          3      0.782      0.667      0.741       0.59
          dining table         10         13      0.433      0.615      0.534      0.415
                toilet          2          2      0.621        0.5      0.745      0.721
                    tv          2          2      0.282        0.5      0.695      0.606
                laptop          2          3          1          0       0.32      0.264
                 mouse          2          2          1          0     0.0454    0.00454
                remote          5          8      0.823        0.5       0.57      0.497
            cell phone          5          8          0          0     0.0532     0.0334
             microwave          3          3      0.395      0.667      0.806      0.718
                  oven          5          5      0.414        0.4       0.34       0.27
                  sink          4          6      0.356      0.167      0.167      0.123
          refrigerator          5          5      0.606        0.4      0.629       0.49
                  book          6         29      0.623      0.115      0.381      0.185
                 clock          8          9      0.786       0.82      0.879      0.743
                  vase          2          2      0.307          1      0.828      0.795
              scissors          1          1          1          0      0.249      0.079
            teddy bear          6         21          1      0.374      0.642      0.406
            toothbrush          2          5      0.638        0.4      0.635      0.371
Speed: 0.3ms preprocess, 4.0ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to runs\detect\train7
In [6]:
import xml.etree.ElementTree as ET

# Function to get the data from XML Annotation
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text

        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(subsubelem.text)            
            info_dict['bboxes'].append(bbox)
    
    return info_dict

# Dictionary that maps class names to IDs
class_name_to_id_mapping = {"trafficlight": 0,
                           "stop": 1,
                           "speedlimit": 2,
                           "crosswalk": 3}

# Convert the info dict to the required yolo format and write it to disk
def convert_to_yolov5(info_dict, rootpath='.', write_to_file=False):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))

    if write_to_file:
        # Name of the file which we have to save 
        save_file_name = os.path.join(rootpath, "annotations", info_dict["filename"].replace("png", "txt"))
    
        # Save the annotation to disk
        print("\n".join(print_buffer), file= open(save_file_name, "w"))
    else:
        return print_buffer 
In [7]:
xml_file = '06-object-detection-using-yolo/road0.xml'
xml_info = extract_info_from_xml(xml_file)
print('xml_info:\n', xml_info)

yolo_annotations = convert_to_yolov5(xml_info)
print('yolo_annotations:\n', yolo_annotations)
xml_info:
 {'bboxes': [{'class': 'trafficlight', 'xmin': 98, 'ymin': 62, 'xmax': 208, 'ymax': 232}], 'filename': 'road0.png', 'image_size': (267, 400, 3)}
yolo_annotations:
 ['0 0.573 0.367 0.412 0.425']
In [9]:
import os
from tqdm import tqdm

# Rootpath indicates the root folder where you have stored the road-sign-detection dataset
rootpath = "./road-sign-detection"

annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "xml"]
annotations.sort()

for ann in tqdm(annotations):
    info_dict = extract_info_from_xml(ann)
    convert_to_yolov5(info_dict, rootpath=rootpath, write_to_file=True)
annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "txt"]
100%|██████████| 877/877 [00:04<00:00, 212.95it/s]
In [13]:
import random
from sklearn.model_selection import train_test_split
import shutil

images = [os.path.join(rootpath, 'images', x) for x in os.listdir(os.path.join(rootpath, 'images'))]
annotations = [os.path.join(rootpath,'annotations', x) for x in os.listdir(os.path.join(rootpath,'annotations')) if x[-3:] == "txt"]

images.sort()
annotations.sort()

train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)
In [14]:
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False
In [ ]:
# move_files_to_folder(train_images,destination_folder="./data/images/train")
# move_files_to_folder(val_images,destination_folder="./data/images/val")
# move_files_to_folder(test_images,destination_folder="./data/images/test")
# move_files_to_folder(train_annotations,destination_folder="./data/labels/train")
# move_files_to_folder(val_annotations,destination_folder="./data/labels/val")
# move_files_to_folder(test_annotations,destination_folder="./data/labels/test")

TRAINING - Pretrained with no fine tuning¶

In [57]:
print('Using a pretrained model')
model = YOLO('06-object-detection-using-yolo/yolov8n.pt')  # load a pretrained model


results = model.val(data='road_sign.yaml')
print(results.box.map)    # mean Average Precision
Using a pretrained model
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:03<00:00,  1.53it/s]
                   all         88        132    0.00687     0.0439    0.00373   0.000772
                person         10         19    0.00231      0.105    0.00144    0.00114
               bicycle         10         10          0          0          0          0
                   car         69         81    0.00137     0.0247   0.000704   0.000669
            motorcycle         16         22     0.0238     0.0455     0.0128    0.00128
                person         10         19    0.00231      0.105    0.00144    0.00114
               bicycle         10         10          0          0          0          0
                   car         69         81    0.00137     0.0247   0.000704   0.000669
            motorcycle         16         22     0.0238     0.0455     0.0128    0.00128
Speed: 1.2ms preprocess, 6.6ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to runs\detect\val9
0.0007719753207891265
In [58]:
test_dir = Path('./data/images/test/')
test_images = list(test_dir.glob('*.png'))
random_images = random.sample(test_images, 4)
plt.figure(figsize=(15, 12))

for i, img_path in enumerate(random_images):
    # Run inference
    results = model.predict(str(img_path), conf=0.25)
    
    # Get the image with annotations
    annotated_img = results[0].plot()
    
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.subplot(2, 2, i+1)
    plt.imshow(annotated_img)
    plt.title(f"Image: {img_path.name}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('random_test_predictions.png')
plt.show()
image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road293.png: 640x480 (no detections), 14.6ms
Speed: 1.7ms preprocess, 14.6ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road445.png: 640x480 1 car, 14.0ms
Speed: 1.6ms preprocess, 14.0ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road183.png: 640x480 2 cars, 13.9ms
Speed: 1.7ms preprocess, 13.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road174.png: 640x480 1 stop sign, 1 clock, 14.0ms
Speed: 1.8ms preprocess, 14.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)
No description has been provided for this image

TRAINING - Pretrained with 5 epochs¶

In [59]:
model = YOLO('06-object-detection-using-yolo/yolov8n.pt')
results = model.train(data='road_sign.yaml', epochs = 5)
results = model.val(data='road_sign.yaml')
print(results.box.map) 
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
engine\trainer: task=detect, mode=train, model=06-object-detection-using-yolo/yolov8n.pt, data=road_sign.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train14, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs\detect\train14
Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
 16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    752092  ultralytics.nn.modules.head.Detect           [4, [64, 128, 256]]           
Model summary: 129 layers, 3,011,628 parameters, 3,011,612 gradients, 8.2 GFLOPs

Transferred 319/355 items from pretrained weights
TensorBoard: Start with 'tensorboard --logdir runs\detect\train14', view at http://localhost:6006/
Freezing layer 'model.22.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks...
AMP: checks passed 
train: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\train.cache... 701 images, 0 backgrounds, 0 corrupt: 100%|██████████| 701/701 [00:00<?, ?it/s]
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
Plotting labels to runs\detect\train14\labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.00125, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
TensorBoard: model graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs\detect\train14
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        1/5      2.03G     0.7721      2.578     0.9315         32        640: 100%|██████████| 44/44 [00:10<00:00,  4.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.46it/s]
                   all         88        132          1      0.129       0.56      0.464

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        2/5      2.32G     0.7896      1.495     0.9345         50        640: 100%|██████████| 44/44 [00:09<00:00,  4.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.53it/s]
                   all         88        132      0.752      0.646       0.75      0.557

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        3/5      2.34G     0.7502      1.285     0.9333         26        640: 100%|██████████| 44/44 [00:09<00:00,  4.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.55it/s]
                   all         88        132      0.912      0.788      0.888      0.699

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        4/5      2.34G     0.7014      1.089     0.9134         37        640: 100%|██████████| 44/44 [00:09<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.83it/s]
                   all         88        132      0.915      0.856      0.911      0.739

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        5/5      2.35G     0.6462     0.9812     0.8978         27        640: 100%|██████████| 44/44 [00:10<00:00,  4.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.39it/s]
                   all         88        132      0.922      0.886      0.911       0.73

5 epochs completed in 0.016 hours.
Optimizer stripped from runs\detect\train14\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train14\weights\best.pt, 6.2MB

Validating runs\detect\train14\weights\best.pt...
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
Model summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  2.02it/s]
                   all         88        132      0.915      0.856      0.911      0.736
          trafficlight         10         19      0.811      0.677      0.814      0.496
                  stop         10         10      0.956        0.9      0.897      0.833
            speedlimit         69         81      0.974      0.939      0.989      0.864
             crosswalk         16         22      0.918      0.909      0.943      0.752
Speed: 0.2ms preprocess, 3.7ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to runs\detect\train14
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
Model summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:05<00:00,  1.01it/s]
                   all         88        132      0.928      0.857      0.914      0.741
          trafficlight         10         19      0.865      0.678      0.824      0.508
                  stop         10         10      0.955        0.9      0.897      0.825
            speedlimit         69         81      0.974      0.942      0.989       0.87
          trafficlight         10         19      0.865      0.678      0.824      0.508
                  stop         10         10      0.955        0.9      0.897      0.825
            speedlimit         69         81      0.974      0.942      0.989       0.87
             crosswalk         16         22      0.918      0.909      0.945      0.762
Speed: 1.1ms preprocess, 4.5ms inference, 0.0ms loss, 1.7ms postprocess per image
Results saved to runs\detect\train142
0.7409292578791502
In [60]:
test_dir = Path('./data/images/test/')
test_images = list(test_dir.glob('*.png'))
random_images = random.sample(test_images, 4)
plt.figure(figsize=(15, 12))

for i, img_path in enumerate(random_images):
    # Run inference
    results = model.predict(str(img_path), conf=0.25)
    
    # Get the image with annotations
    annotated_img = results[0].plot()
    
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.subplot(2, 2, i+1)
    plt.imshow(annotated_img)
    plt.title(f"Image: {img_path.name}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('random_test_predictions.png')
plt.show()
image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road550.png: 640x480 1 speedlimit, 22.6ms
Speed: 2.7ms preprocess, 22.6ms inference, 6.4ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road590.png: 640x480 1 speedlimit, 9.3ms
Speed: 2.1ms preprocess, 9.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road143.png: 640x448 1 crosswalk, 9.9ms
Speed: 1.4ms preprocess, 9.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road408.png: 640x480 1 trafficlight, 1 speedlimit, 10.6ms
Speed: 1.9ms preprocess, 10.6ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 480)
No description has been provided for this image

TRAINING - Training from scratch with 5 epochs¶

In [61]:
print('Using a scratch model')
model = YOLO('yolov8n.yaml')
results = model.train(data='road_sign.yaml', epochs = 5)
results = model.val(data='road_sign.yaml')

print(results.box.map)
Using a scratch model
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
engine\trainer: task=detect, mode=train, model=yolov8n.yaml, data=road_sign.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train15, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs\detect\train15
Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
 16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    752092  ultralytics.nn.modules.head.Detect           [4, [64, 128, 256]]           
YOLOv8n summary: 129 layers, 3,011,628 parameters, 3,011,612 gradients, 8.2 GFLOPs

TensorBoard: Start with 'tensorboard --logdir runs\detect\train15', view at http://localhost:6006/
Freezing layer 'model.22.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks...
AMP: checks passed 
train: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\train.cache... 701 images, 0 backgrounds, 0 corrupt: 100%|██████████| 701/701 [00:00<?, ?it/s]
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
Plotting labels to runs\detect\train15\labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.00125, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
TensorBoard: model graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs\detect\train15
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        1/5      2.08G      3.536      5.461      4.327         32        640: 100%|██████████| 44/44 [00:10<00:00,  4.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.96it/s]
                   all         88        132   0.000264     0.0724   0.000463   9.84e-05

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        2/5      2.37G      3.344      4.681      3.849         50        640: 100%|██████████| 44/44 [00:09<00:00,  4.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.49it/s]
                   all         88        132   0.000269     0.0724   0.000522   0.000138

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        3/5      2.39G      2.998      4.265      3.396         26        640: 100%|██████████| 44/44 [00:09<00:00,  4.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.39it/s]
                   all         88        132      0.703     0.0404     0.0375      0.016

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        4/5      2.39G      2.575      3.598      2.991         37        640: 100%|██████████| 44/44 [00:10<00:00,  4.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.53it/s]
                   all         88        132      0.822       0.13      0.175     0.0852

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        5/5      2.41G       2.33      3.135      2.697         27        640: 100%|██████████| 44/44 [00:12<00:00,  3.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.03it/s]
                   all         88        132      0.729      0.244       0.27      0.151

5 epochs completed in 0.018 hours.
Optimizer stripped from runs\detect\train15\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train15\weights\best.pt, 6.2MB

Validating runs\detect\train15\weights\best.pt...
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.43it/s]
                   all         88        132      0.729      0.244       0.27      0.152
          trafficlight         10         19          1          0    0.00326   0.000624
                  stop         10         10      0.481        0.3      0.347      0.152
            speedlimit         69         81      0.436      0.676      0.613      0.388
             crosswalk         16         22          1          0      0.117     0.0655
Speed: 0.2ms preprocess, 3.8ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to runs\detect\train15
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:05<00:00,  1.18it/s]
                   all         88        132      0.728      0.245      0.264      0.151
          trafficlight         10         19          1          0    0.00325   0.000627
                  stop         10         10      0.482        0.3      0.347      0.152
            speedlimit         69         81      0.429      0.679      0.615      0.395
          trafficlight         10         19          1          0    0.00325   0.000627
                  stop         10         10      0.482        0.3      0.347      0.152
            speedlimit         69         81      0.429      0.679      0.615      0.395
             crosswalk         16         22          1          0     0.0917      0.056
Speed: 0.9ms preprocess, 4.7ms inference, 0.0ms loss, 1.7ms postprocess per image
Results saved to runs\detect\train152
0.15070529190515633
In [62]:
test_dir = Path('./data/images/test/')
test_images = list(test_dir.glob('*.png'))
random_images = random.sample(test_images, 4)
plt.figure(figsize=(15, 12))

for i, img_path in enumerate(random_images):
    # Run inference
    results = model.predict(str(img_path), conf=0.25)
    
    # Get the image with annotations
    annotated_img = results[0].plot()
    
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.subplot(2, 2, i+1)
    plt.imshow(annotated_img)
    plt.title(f"Image: {img_path.name}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('random_test_predictions.png')
plt.show()
image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road550.png: 640x480 1 speedlimit, 10.4ms
Speed: 2.2ms preprocess, 10.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road590.png: 640x480 1 speedlimit, 6.5ms
Speed: 1.4ms preprocess, 6.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road143.png: 640x448 (no detections), 11.8ms
Speed: 1.3ms preprocess, 11.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 448)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road408.png: 640x480 2 speedlimits, 9.9ms
Speed: 2.5ms preprocess, 9.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 480)
No description has been provided for this image

TRAINING - Training from scratch with 15 epochs¶

In [63]:
print('Using a scratch model')
model = YOLO('yolov8n.yaml')
results = model.train(data='road_sign.yaml', epochs = 15)
results = model.val(data='road_sign.yaml')

print(results.box.map)
Using a scratch model
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
engine\trainer: task=detect, mode=train, model=yolov8n.yaml, data=road_sign.yaml, epochs=15, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train16, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs\detect\train16
Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
 16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    752092  ultralytics.nn.modules.head.Detect           [4, [64, 128, 256]]           
YOLOv8n summary: 129 layers, 3,011,628 parameters, 3,011,612 gradients, 8.2 GFLOPs

TensorBoard: Start with 'tensorboard --logdir runs\detect\train16', view at http://localhost:6006/
Freezing layer 'model.22.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks...
AMP: checks passed 
train: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\train.cache... 701 images, 0 backgrounds, 0 corrupt: 100%|██████████| 701/701 [00:00<?, ?it/s]
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
Plotting labels to runs\detect\train16\labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.00125, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
TensorBoard: model graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs\detect\train16
Starting training for 15 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/15      2.09G      3.536      5.461      4.327         32        640: 100%|██████████| 44/44 [00:10<00:00,  4.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.35it/s]
                   all         88        132   0.000264     0.0724   0.000463   9.84e-05

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       2/15      2.38G      3.314      4.678      3.811         50        640: 100%|██████████| 44/44 [00:09<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.56it/s]
                   all         88        132   0.000277     0.0724   0.000421   0.000129

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       3/15       2.4G      2.876      4.132      3.261         26        640: 100%|██████████| 44/44 [00:09<00:00,  4.58it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.25it/s]
                   all         88        132      0.579     0.0593     0.0442     0.0102

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       4/15       2.4G      2.393      3.375      2.731         37        640: 100%|██████████| 44/44 [00:10<00:00,  4.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.02it/s]
                   all         88        132      0.551     0.0586     0.0847     0.0456

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       5/15       2.4G       2.06        2.7      2.406         27        640: 100%|██████████| 44/44 [00:09<00:00,  4.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.28it/s]
                   all         88        132      0.718      0.229      0.292      0.179

Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       6/15       2.4G      1.561      2.451      2.053         18        640: 100%|██████████| 44/44 [00:09<00:00,  4.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.35it/s]
                   all         88        132      0.792      0.361      0.392      0.253

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       7/15       2.4G      1.361      2.002      1.865         19        640: 100%|██████████| 44/44 [00:09<00:00,  4.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.64it/s]
                   all         88        132      0.571        0.5      0.524      0.339

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       8/15       2.4G      1.262      1.768      1.722         16        640: 100%|██████████| 44/44 [00:09<00:00,  4.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  5.41it/s]
                   all         88        132      0.879      0.403      0.521      0.371

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       9/15       2.4G      1.182      1.539      1.651         16        640: 100%|██████████| 44/44 [00:10<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.45it/s]
                   all         88        132      0.897      0.493      0.565        0.4

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      10/15       2.4G      1.149      1.432      1.595         16        640: 100%|██████████| 44/44 [00:10<00:00,  4.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.29it/s]
                   all         88        132      0.871      0.529      0.617      0.437

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      11/15       2.4G       1.11      1.335      1.548         25        640: 100%|██████████| 44/44 [00:10<00:00,  4.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.92it/s]
                   all         88        132      0.982      0.541      0.621      0.421

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      12/15       2.4G      1.075      1.243      1.522         17        640: 100%|██████████| 44/44 [00:09<00:00,  4.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.81it/s]
                   all         88        132      0.962      0.561      0.638       0.44

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      13/15       2.4G       1.03      1.163      1.455         21        640: 100%|██████████| 44/44 [00:10<00:00,  4.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.37it/s]
                   all         88        132      0.793      0.586      0.653      0.468

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      14/15       2.4G     0.9817      1.105      1.421         15        640: 100%|██████████| 44/44 [00:10<00:00,  4.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.91it/s]
                   all         88        132      0.637      0.619      0.658      0.495

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      15/15       2.4G     0.9431      1.064      1.394         16        640: 100%|██████████| 44/44 [00:10<00:00,  4.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.38it/s]
                   all         88        132      0.927      0.615      0.677      0.484

15 epochs completed in 0.052 hours.
Optimizer stripped from runs\detect\train16\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train16\weights\best.pt, 6.2MB

Validating runs\detect\train16\weights\best.pt...
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.25it/s]
                   all         88        132      0.973      0.583      0.661      0.496
          trafficlight         10         19          1          0     0.0686     0.0174
                  stop         10         10          1      0.789      0.878      0.663
            speedlimit         69         81      0.971      0.951      0.975      0.817
             crosswalk         16         22       0.92      0.591      0.721      0.487
Speed: 0.2ms preprocess, 4.0ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to runs\detect\train16
Ultralytics 8.3.93  Python-3.11.0 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs
val: Scanning C:\Users\alihu\Documents\Computer Vision\YOLO\data\labels\val.cache... 88 images, 0 backgrounds, 0 corrupt: 100%|██████████| 88/88 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:04<00:00,  1.29it/s]
                   all         88        132      0.973      0.583      0.661      0.494
          trafficlight         10         19          1          0     0.0677     0.0174
                  stop         10         10          1      0.789      0.878      0.655
            speedlimit         69         81      0.973      0.951      0.975      0.817
             crosswalk         16         22       0.92      0.591      0.722      0.487
          trafficlight         10         19          1          0     0.0677     0.0174
                  stop         10         10          1      0.789      0.878      0.655
            speedlimit         69         81      0.973      0.951      0.975      0.817
             crosswalk         16         22       0.92      0.591      0.722      0.487
Speed: 1.0ms preprocess, 5.0ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to runs\detect\train162
0.49416708353688854
In [65]:
test_dir = Path('./data/images/test/')
test_images = list(test_dir.glob('*.png'))
random_images = random.sample(test_images, 4)
plt.figure(figsize=(15, 12))

for i, img_path in enumerate(random_images):
    # Run inference
    results = model.predict(str(img_path), conf=0.25)
    
    # Get the image with annotations
    annotated_img = results[0].plot()
    
    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.subplot(2, 2, i+1)
    plt.imshow(annotated_img)
    plt.title(f"Image: {img_path.name}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('random_test_predictions.png')
plt.show()
image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road672.png: 640x480 1 speedlimit, 8.9ms
Speed: 1.5ms preprocess, 8.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road665.png: 640x480 2 speedlimits, 15.4ms
Speed: 2.3ms preprocess, 15.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road563.png: 640x480 1 speedlimit, 1 crosswalk, 20.3ms
Speed: 2.3ms preprocess, 20.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 c:\Users\alihu\Documents\Computer Vision\YOLO\data\images\test\road433.png: 640x480 1 speedlimit, 11.4ms
Speed: 4.7ms preprocess, 11.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 480)
No description has been provided for this image
In [ ]:
precision_values = [0.000771, 0.740929, 0.150705, 0.494167]  

model_types = ['Pretrained\nNo Fine-tuning', 'Pretrained\n5 epochs', 
              'Scratch\n5 epochs', 'Scratch\n15 epochs']

plt.figure(figsize=(10, 6))
bars = plt.bar(model_types, precision_values, width=0.6, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])

for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
            f'{height:.3f}', ha='center', va='bottom', fontsize=11)

plt.ylabel('mAP50 (Precision)', fontsize=12)
plt.title('Precision Comparison Across Different Model Training Approaches', fontsize=14)
plt.ylim(0, 1.0)  # Set y-axis from 0 to 1 for precision values
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.axhline(y=max(precision_values), color='red', linestyle='--', alpha=0.3)

plt.tight_layout()
plt.savefig('precision_comparison.png', dpi=300)
plt.show()
No description has been provided for this image
In [ ]: